/**********************************************************************/
/*
   	Author: Karan Makkar, Michelle Han, Adapted from Nikhil Kumar
	Last Updated: July 2024
   	Description: Cleans raw September 2018 SUSENAS data. Outputs deidentified
	cleaned SUSENAS data merged with PMO data.

	This cleaning file should output 2 different datasets:
	1. Cleaned Susenas data:
	sus_sep18_deid_clean
	2. Subset of Susenas person-batch data matched with PMO:
	sus_sep18_deid_clean_merged

*/
/**********************************************************************/


*******************************************
* Setup
* include filepaths 
  	if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"
	cap log close
	global prefix: display %tdCYND td(`c(current_date)')
	log using "$KP_logs/${prefix}_clean_SUSENAS_sep2018.txt", text replace

*******************************************
* Load Data	

* Save consumption data in tempfile
	import dbase "$KP_deid_susenas/Raw/Household/dbf/sep18/blok43_diseminasi.dbf", clear

	keep URUT FOOD NONFOOD

	rename URUT2 urut2
	rename FOOD food
	rename NONFOOD nonfood

	tempfile cons
	sa `cons'

	use "$KP_deid_susenas/Raw/Individual/SUSENAS_SEP18_deid.dta", clear
	merge m:1 urut2 using "$KP_deid_susenas/Raw/Household/susenas18sep_rt_diseminasi.dta", assert(3) nogen
	merge m:1 urut2 using `cons', assert(3) nogen


*******************************************
* Demographics

* Gender
	gen gender = m405 == 1
	la def gender 1 "Male" 0 "Female"
	la val gender gender

	gen female = m405 == 2
	la def female 1 "Female" 0 "Male"
	la val female female


* Education (translated to years of schooling)
	gen educ = m1404
	gen school_years = 0 if educ == 1
	replace school_years = 6 if inlist(educ, 2, 3, 4, 5) // Paket A, SDLB, SD, MI
	replace school_years = 9 if inlist(educ, 6, 7, 8, 9) // Paket B, SMP LB, SMP, MTs
	replace school_years = 12 if inlist(educ, 10, 11, 12, 13, 14, 15) // Paket C, SMLB, SMA, MA, SMK, MAK
	replace school_years = 13.5 if educ == 16 // Diploma I/II
	replace school_years = 15 if educ == 17 // Diploma III
	replace school_years = 16 if inlist(educ, 18, 19) // Diploma IV / SI
	replace school_years = 18 if inlist(educ, 20, 21) // SII/Profesi
	replace school_years = 20 if educ == 22 // SIII

* Dummy for educated (graduating high school)
	gen educated = school_years > 12 
	la def educated 0 "Less than 12 Years Schooling" 1 "12 or More Years of Schooling"
	la val educated educated

* Number of HH members
	gen hh_size = m301

* Do you (applicant) live in same kabupaten you were born in?
	gen born_kab = m603

* Age
	gen age = m407
	recode age (0/20 = 1) (21/40 = 2) (41/60 = 3) (61/80 = 4) (81/100 = 5), gen(age_cat)
	
* Dummy for age <=30 
	gen young = 0 
	replace young = 1 if age <= 30
	la def young 0 "Over 30 Years Old" 1 "30 and Under"
	la val young young 	

*******************************************
* Employment

* Employed in week prior to survey
	gen employment_status = m1705
	gen self_employed = m1705 == 1
	gen business_owner = m1705 == 2 | m1705 == 3
	gen perm_worker = m1705 == 4
	gen family_unpaid = m1705 == 6
	gen self_emp_bus_owner = (m1705 == 1 | m1705 == 2 | m1705 == 3)

	gen employed = inlist(employment_status, 1, 2, 3, 4, 5)
	label define emp 1 "Self-employed" 2 "Business with temp emp" ///
			3 "Business with perm emp" ///
			4 "Permanent Worker" 5 "Freelancer" 6 "Family/unpaid worker", replace
	lab val employment_status emp
	tab employed employment_status, m row

	gen hrs_worked_all = m1706
	replace hrs_worked_all = 0 if employed == 0


*******************************************
* Consumption

* Consumption
* food
	gen cons_food = food/m301
	gen lcon_food = log(cons_food)

	gen cons_adj_food = cons_food
	sum cons_adj_food if cons_adj_food > 0, d
	replace cons_adj_food = r(p99) if cons_adj_food > r(p99) & !mi(cons_adj_food)
	replace cons_adj_food = r(p1) if cons_adj_food < r(p1) & !mi(cons_adj_food)
	replace cons_adj_food = cons_adj_food / 1000
	la var cons_adj_food "Adjusted Food Consumption (in 1000s of rupiah, trimmed of 1st and 99th pct)"
	sum cons_adj_food, d
	gen lcon_adj_food = log(cons_adj_food)

* non-food
	gen cons_nfood = nonfood/m301
	gen lcon_nfood = log(cons_nfood)

	gen cons_adj_nfood = cons_nfood
	sum cons_adj_nfood if cons_adj_nfood > 0, d
	replace cons_adj_nfood = r(p99) if cons_adj_nfood > r(p99) & !mi(cons_adj_nfood)
	replace cons_adj_nfood = r(p1) if cons_adj_nfood < r(p1) & !mi(cons_adj_nfood)
	replace cons_adj_nfood = cons_adj_nfood / 1000
	la var cons_adj_nfood "Adjusted Non-Food Consumption (in 1000s of rupiah, trimmed of 1st and 99th pct)"
	sum cons_adj_nfood, d
	gen lcon_adj_nfood = log(cons_adj_nfood)

* tot consumption
	gen tot_cons = cons_food + cons_nfood
	gen ltot_cons = log(tot_cons)

	gen tot_adj_cons = tot_cons
	sum tot_adj_cons if tot_adj_cons > 0, d
	replace tot_adj_cons = r(p99) if tot_adj_cons > r(p99) & !mi(tot_adj_cons)
	replace tot_adj_cons = r(p1) if tot_adj_cons < r(p1) & !mi(tot_adj_cons)
	replace tot_adj_cons = tot_adj_cons / 1000
	la var tot_adj_cons "Adjusted Total Consumption (in 1000s of rupiah, trimmed of 1st and 99th pct)"	
	sum tot_adj_cons, d
	gen ltot_adj_cons = log(tot_adj_cons)

*******************************************
* Telecom

* Internet use
	gen use_internet = m1205 ==1

* why use internet
	gen internet_info = m1206a == "A"
	gen internet_social = m1206c == "C" | m1206d == "D"
	gen internet_buy_sell = m1206e == "E" | m1206h == "H"
	gen internet_entertain = m1206f == "F"
	gen use_internet_bank = m1206g == "G"

*******************************************
* Program Receipt

* 2003:  receive BPNT
	gen ever_BPNT = m2003 ==1
	tab ever_BPNT

* 2007:  Family Card Hejahtera (KKS)
	gen receive_KKS = (m2007 ==1 | m2007 ==2)
	tab receive_KKS

* 2008:  PKH last year
	gen receive_pkh_year = m2008  ==1
	tab receive_pkh_year

* 2009b:  PKH Now
	gen receive_pkh_now = m2009b ==1
	tab receive_pkh_now

*******************************************
* check duplicates
  duplicates tag anon_id4, gen(anon_id4_dupe)
  tab anon_id4_dupe

* drop duplicates 
  drop if anon_id4_dupe != 0 & !missing(anon_id4)
  drop anon_id4_dupe

/*----------------------------------------------------*/
              * Export cleaned data
/*----------------------------------------------------*/

  rename fwt weight
  datasignature
    	if "`r(datasignature)'" == "277402:389(76925):27454566:382097936" {
    	save "$KP_deid_susenas/Clean/sus_sep18_deid_clean.dta", replace
      	}
  	else {
    	di as err "Careful, your machine produces a different dataset"
    	stop
		}
  

	/*----------------------------------------------------*/
	          * Merge with admin data and export
	/*----------------------------------------------------*/

	gen sus_round = 2
	keep if !mi(anon_id4)
	drop m1* m2* m3* m4* m5* m6* m7* m8* m9*

	* Merge with PMO data
	destring date_incentive, replace force

    merge 1:m anon_id4 using "$KP_deid_admin/Clean/pmo_b1-22_clean_long_deid.dta", nogen keep(3)

	compress
	datasignature
    	if "`r(datasignature)'" == "44323:123(50952):3354667459:91849211" {
    	save "$KP_deid_susenas/Clean/sus_sep18_deid_clean_merged.dta", replace
      	}
  	else {
    	di as err "Careful, your machine produces a different dataset"
    	stop
		}
	
